*********************************************************************************
*Replication script for: Höhne, J.K., Lenzner, T., & Claassen, J. (under review). 
*Automatic speech-to-text transcription: Evidence from a smartphone survey with 
*voice answers. International Journal of Social Research Methodology.
*********************************************************************************

****************
*loading dataset
****************

use voice_answers_speech_to_text.dta, clear


***********************
*Sample characteristics
***********************

tab group

ttest age, by(group)
tab group gender, row chi
tab group education, row chi
ttest smartphone_skills, by(group)
ttest internet_usage, by(group)


*******************************************
*keep only voice group for further analyses
*******************************************

keep if group == 2


************************************************
*Coder agreement for OPQ1 and OPQ2, respectively
************************************************

*Note. Multiple error types could occur in one transcript. To calculate coder agreement we generated unique codes for each error type combination. These codes are not labeled because they are only used to calculate Cohen's Kappa.

*Aggregating Whisper and Google
keep ID OPQ1_google_quality_coder_1-OPQ2_whisper_error_coder_2

reshape long OPQ1@_quality_coder_1 OPQ1@_quality_coder_2 OPQ2@_quality_coder_1 OPQ2@_quality_coder_2 OPQ1@_error_coder_1 OPQ1@_error_coder_2 OPQ2@_error_coder_1 OPQ2@_error_coder_2, i(ID) j(ASR) string

*Research question 1
kap OPQ1_quality_coder_1 OPQ1_quality_coder_2
kap OPQ2_quality_coder_1 OPQ2_quality_coder_2

*Research question 2
kap OPQ1_error_coder_1 OPQ1_error_coder_2
kap OPQ2_error_coder_1 OPQ2_error_coder_2

*Restore original dataset
use voice_answers_speech_to_text.dta, clear
keep if group == 2


*****************************************************
*Coder agreement for Whisper and Google, respectively
*****************************************************

*Aggregating OPQ1 and OPQ2
keep ID OPQ1_google_quality_coder_1-OPQ2_whisper_error_coder_2

reshape long @google_quality_coder_1 @google_quality_coder_2 @whisper_quality_coder_1 @whisper_quality_coder_2 @google_error_coder_1 @google_error_coder_2 @whisper_error_coder_1 @whisper_error_coder_2, i(ID) j(question) string

*Research question 1
kap whisper_quality_coder_1 whisper_quality_coder_2
kap google_quality_coder_1 google_quality_coder_2

*Research question 2
kap whisper_error_coder_1 whisper_error_coder_2
kap google_error_coder_1 google_error_coder_2

*Restore original dataset
use voice_answers_speech_to_text.dta, clear
keep if group == 2

********************************************
*Aggregating F8 and F10 for further analyses
********************************************

reshape long @google_quality @whisper_quality @google_error_1 @google_error_2 @google_error_3 @google_error_4 @google_error_5 @google_error_6 @google_error_7 @whisper_error_1 @whisper_error_2 @whisper_error_3 @whisper_error_4 @whisper_error_5 @whisper_error_6 @whisper_error_7 @google_quality_coder_1 @google_quality_coder_2 @whisper_quality_coder_1 @whisper_quality_coder_2 @google_error_coder_1 @google_error_coder_2 @whisper_error_coder_1 @whisper_error_coder_2, i(ID) j(question) string


********************
*Research question 1
********************

tab google_quality if google_quality != -77 & google_quality != 4

tab whisper_quality if whisper_quality != -77 & whisper_quality != 4


********************
*Research question 2
********************

tab google_error_1 if google_error_1 >= 0
tab google_error_2 if google_error_2 >= 0
tab google_error_3 if google_error_3 >= 0
tab google_error_4 if google_error_4 >= 0
tab google_error_5 if google_error_5 >= 0
tab google_error_6 if google_error_6 >= 0
tab google_error_7 if google_error_7 >= 0

tab whisper_error_1 if whisper_error_1 >= 0
tab whisper_error_2 if whisper_error_2 >= 0
tab whisper_error_3 if whisper_error_3 >= 0
tab whisper_error_4 if whisper_error_4 >= 0
tab whisper_error_5 if whisper_error_5 >= 0
tab whisper_error_6 if whisper_error_6 >= 0
tab whisper_error_7 if whisper_error_7 >= 0



